# coding:utf-8

import scrapy
import lxml
from ..items import MyspItem

# scrapy crawl cnblogs

class myspider(scrapy.Spider):
    name = "cnblogs"
    allowed_domains = ["cnblogs.com"]
    start_urls = ["https://www.cnblogs.com/qiyeboy/default.html?page=1"]

    def parse(self, response):
        papers = response.xpath("//div[@class='day']")
        # papers = response.xpath("//*[@class='postTitle']/a/@href")
        print(papers)

        for paper in papers:
            url = paper.xpath(".//*[@class='postTitle']/a/@href").extract()[0]
            title = paper.xpath(".//*[@class='postTitle']/a/text()").extract()[0]
            time = paper.xpath(".//*[@class='dayTitle']/a/text()").extract()[0]
            content = paper.xpath(".//*[@class='postTitle']/a/text()").extract()[0]

            print(url, title, time, content)

            item = MyspItem(url=url, title=title, time=time, content=content)
            yield item
        next_page = scrapy.Selector(response).re(u'<a href="(\S*)">下一页</a>')
        if next_page:
            yield scrapy.Request(url= next_page[0],callback=self.parse)
